America has 61 national parks. They have grown in size and popularity over the years.
library(devtools)
library(tidyverse)
library(gganimate)
library(gifski)
library(png)
library(here)
library(lemon)
library(RColorBrewer)
library(scales)
library(janitor)
library(ggrepel)
library(ggvoronoi)
# library(ggtext)
library(rvest)
library(fuzzyjoin)
library(jkmisc)
library(ragg)
library(plotly)
This is a demostration of using the janitor package
We will now import the datasets as prepared and cleaned by the tidytuesday folks
park_visits <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/national_parks.csv")
state_pop <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/state_pop.csv")
gas_price <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/gas_price.csv")
park_locations <- read_csv("data/NationalParksLatLong.csv")
national_parks.csv| variable | class | description |
|---|---|---|
| year_raw | integer | Year of record |
| gnis_id | character | ID for shapefile and long-lat lookup |
| geometry | character | Geometric shape for shapefile |
| metadata | character | URL to metadata about the park |
| number_of_records | double | Number of records |
| parkname | character | Full park name |
| region | character | US Region where park is located |
| state | character | State abbreviation |
| unit_code | character | Park code abbreviation |
| unit_name | character | Park Unit name |
| unit_type | character | Park unit type |
| visitors | double | Number of visitors |
state_pop.csv| variable | class | description |
|---|---|---|
| year | integer | Jan 1st of year |
| state | character | State abbreviation |
| pop | double | Population |
gas_price.csv| variable | class | description |
|---|---|---|
| year | double | Year (Jan 1st) |
| gas_current | double | Gas price in that year (dollars/gallon) |
| gas_constant | double | Gas price (constant 2015 dollars/gallon) |
Source: https://github.com/spren9er/tidytuesday/blob/master/tidytuesday_201938_national_park_visits.r Improvments seen below are discussed here: https://evancanfield.netlify.com/posts/2019-09-16-national-parks-tidytuesday/tidy-tuesday-38/
park_visits <- park_visits %>%
mutate(
parkname =
if_else(
is.na(parkname),
str_trim(str_remove(unit_name, 'National Park')),
parkname
)
)
highlight_parks <- c(
'GREAT SMOKY MOUNTAINS',
'GRAND CANYON',
'ROCKY MOUNTAIN',
'YOSEMITE',
'YELLOWSTONE',
'ZION',
'ACADIA',
'DENALI',
'HOT SPRINGS',
'CARLSBAD CAVERNS',
'GREAT BASIN'
)
highlight_colors <- c(
'#223e15',
'#176785',
'#499989',
'#5fa73f',
'#ff8706',
'#ff534e',
'#f5b901',
'#9a91fa',
'#c988d2',
'#6da5c2',
'#fe43bc'
)
ranking_parks <- park_visits %>%
filter(
year != 'Total',
unit_type == 'National Park',
!is.na(parkname),
unit_name != 'Denali National Preserve'
) %>%
mutate(
year = as.integer(year),
parkname = str_to_upper(parkname)
) %>%
filter(year < 2016) %>%
group_by(year) %>%
arrange(year, desc(visitors)) %>%
mutate(rank = row_number()) %>%
ungroup()
top_parks <- ranking_parks %>%
filter(parkname %in% highlight_parks) %>%
mutate(parkname = fct_relevel(str_to_upper(parkname), highlight_parks)) %>%
arrange(year, desc(parkname))
other_parks <- ranking_parks %>%
filter(!parkname %in% highlight_parks)
animation <- top_parks %>%
ggplot(aes(x = year, y = rank, group = parkname, color = parkname)) +
geom_line(
data = other_parks, size = 0.5, show.legend = FALSE, color = '#dadada'
) +
geom_line(show.legend = FALSE, size = 0.8) +
geom_text(
aes(x = year + 0.8, label = parkname),
size = 4.5, show.legend = FALSE, hjust = 0, fontface = 'bold'
) +
scale_x_continuous(breaks = c(1925, 1950, 1975, 2000)) +
scale_y_continuous(
breaks = c(1, 25, 50), labels = c('1ˢᵗ', '25ᵗʰ', '50ᵗʰ'), trans = 'reverse'
) +
scale_color_manual(values = highlight_colors) +
coord_cartesian(clip = 'off') +
transition_reveal(year, keep_last = TRUE) +
labs(
title = 'The most popular national park_visits',
subtitle = 'National park_visits ranked by number of visitors in a given year',
x = '',
y = 'Rank',
caption = '#tidytuesday 38|2019 • © 2019 spren9er'
) +
theme(
plot.background = element_rect(fill = '#f0f0f0'),
plot.margin = margin(t = 40, r = 155, b = 20, l = 20),
plot.title = element_text(
margin = margin(b = 8), size = 38, hjust = -0.17, face = 'bold',
color = '#333333'
),
plot.subtitle = element_text(
margin = margin(t = 6, b = 5), size = 29, hjust = -0.72,
face = 'plain', color = '#333333'
),
plot.caption = element_text(
color = '#999999', size = 13, margin = margin(t = 10), hjust = 0.5,
face = 'plain', family = 'Decima Mono Pro'
),
panel.background = element_rect(fill = '#f0f0f0'),
panel.grid.major = element_line(size = 0.5, color = '#d3d3d3'),
panel.border = element_blank(),
axis.text.x = element_text(
family = 'Decima Mono Pro', color = '#999999', face = 'plain', size = 20,
margin = margin(t = 6)
),
axis.text.y = element_text(
family = 'Decima Mono Pro', color = '#999999', face = 'plain', size = 20,
margin = margin(r = 6)
),
axis.title.y = element_text(color = '#333333', face = 'bold', size = 16)
)
animate(animation, width = 1000, height = 1000, end_pause = 30)
anim_save('images/tidytuesday_201938_national_park_visits.gif')
source: https://github.com/gkaramanis/tidytuesday/blob/master/week-38/national-parks-lines.R
pv_ch <- park_visits %>%
distinct(year, unit_name, unit_type, visitors) %>%
filter(unit_type == "National Park" & year != "Total") %>%
mutate(year = as.numeric(year)) %>%
group_by(unit_name) %>%
arrange(year, .by_group = TRUE) %>%
mutate(yoy = visitors - lag(visitors)) %>%
filter(unit_name != "Denali National Preserve")
pv_ch %>%
ggplot() +
geom_line(aes(x = year, y = yoy, group = unit_name), size = 0.2) +
scale_x_continuous(breaks = seq(1910, 2010, 10), expand = expand_scale(add = c(5, 1))) +
scale_y_continuous(breaks = c(-3000000, 0, 1000000), labels = paste0(c(-3000000, 0, 1000000)/1000000, "M")) +
facet_wrap(vars(unit_name), ncol = 3) +
labs(
title = "National Park Visits 1904–2016",
subtitle = "Year-over-year change in total visits by park",
caption = "Source: dataisplural/data.world | Graphic: Georgios Karamanis"
) +
theme_void(base_family = "IBM Plex Sans") +
theme(
legend.position = "none",
legend.title = element_text(size = 20, color = "grey20"),
legend.margin = margin(0, 0, 20, 0),
plot.background = element_rect(fill = "grey80", color = NA),
strip.background = element_rect(fill = "grey80", color = NA),
strip.text = element_text(family = "IBM Plex Sans Bold", color = "grey30",
hjust = 1, vjust = 1),
plot.title = element_text(size = 28, color = "grey20", family = "IBM Plex Sans Medium"),
plot.subtitle = element_text(size = 20, color = "grey20", margin = margin(5, 0, 30, 0)),
plot.caption = element_text(size = 8, color = "grey30", margin = margin(20, 0, 0, 0)),
axis.text.x = element_text(family = "IBM Plex Mono", size = 7, color = "grey40"),
axis.text.y = element_text(family = "IBM Plex Mono", size = 7, color = "grey40"),
panel.grid.major.x = element_line(color = "grey75"),
panel.grid.major.y = element_line(color = "grey75"),
plot.margin = margin(20, 20, 20, 20)
) +
# save image
ggsave(
here::here("images", paste0("national-parks", format(Sys.time(), "%Y%m%d_%H%M%S"), ".png")),
width = 18, height = 14, dpi = 320
)
## Warning: Removed 60 rows containing missing values (geom_path).
## Warning: Removed 60 rows containing missing values (geom_path).
source: https://github.com/gkaramanis/tidytuesday/blob/master/week-38/national-parks-mean-pct.R
pv_ch <- park_visits %>%
distinct(year, unit_name, unit_type, visitors) %>%
filter(unit_type == "National Park" & year != "Total") %>%
mutate(year = as.numeric(year)) %>%
group_by(unit_name) %>%
filter(unit_name != "Denali National Preserve") %>%
mutate(
mean_visitors = mean(visitors),
mean_pct = log10(visitors/mean_visitors)
) %>%
add_tally()
ggplot(pv_ch) +
geom_segment(aes(x = year, xend = year,
y = 0, yend = 0.5, color = cut_number(mean_pct, 10)), size = 1.4) +
scale_x_continuous(breaks = seq(1910, 2010, 20), expand = expand_scale(add = c(5, 1))) +
scale_y_continuous(expand = c(0.05, 0.25)) +
facet_wrap(vars(fct_reorder(unit_name, -n)), ncol = 3) +
#scale_color_viridis(option = "plasma", discrete = TRUE) +
labs(
title = "National Park Visits, 1904–2016",
subtitle = "Percentage of all-time average number of visits, by year",
caption = "Source: dataisplural/data.world | Graphic: Georgios Karamanis"
) +
guides(color = guide_legend(
# title.position = "top",
label.position = "bottom",
nrow = 1,
title = NULL,
barwidth = 20,
barheight = 0.5
)) +
theme_void(base_family = "IBM Plex Sans") +
theme(
legend.position = "top",
legend.title = element_text(size = 20, color = "grey20"),
legend.margin = margin(0, 0, 20, 0),
plot.background = element_rect(fill = "grey80", color = NA),
strip.background = element_rect(fill = "grey80", color = NA),
strip.text = element_text(family = "IBM Plex Sans Bold", color = "grey30",
hjust = 1, vjust = 1),
plot.title = element_text(size = 28, color = "grey20", family = "IBM Plex Sans Medium"),
plot.subtitle = element_text(size = 20, color = "grey20"),
plot.caption = element_text(size = 8, color = "grey30", margin = margin(20, 0, 0, 0)),
axis.text.x = element_text(family = "IBM Plex Mono", size = 7, color = "grey40"),
panel.grid.major.x = element_line(color = "grey75"),
plot.margin = margin(20, 20, 20, 20)
) +
# save image
ggsave(
here::here("images", paste0("national-parks", format(Sys.time(), "%Y%m%d_%H%M%S"), ".png")),
width = 18, height = 14, dpi = 320
)
Source: https://github.com/gkaramanis/tidytuesday/blob/master/week-38/national-parks.R
pv_ch <- park_visits %>%
distinct(year, unit_name, unit_type, visitors) %>%
filter(unit_type == "National Park" & year != "Total") %>%
mutate(year = as.numeric(year)) %>%
group_by(unit_name) %>%
filter(unit_name != "Denali National Preserve") %>%
mutate(
mean_visitors = mean(visitors),
mean_pct = log10(visitors/mean_visitors),
mean_pct_cut = cut_interval(mean_pct, 10)
) %>%
add_tally()
## Warning in mutate_impl(.data, dots, caller_env()): Unequal factor levels:
## coercing to character
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
## Warning in mutate_impl(.data, dots, caller_env()): binding character and
## factor vector, coercing into character vector
ggplot(pv_ch) +
geom_segment(aes(x = year, xend = year,
y = 0, yend = 0.5, color = mean_pct_cut), size = 1.4) +
scale_x_continuous(breaks = seq(1910, 2010, 20), expand = expand_scale(add = c(5, 1))) +
scale_y_continuous(expand = c(0.05, 0.25)) +
facet_wrap(vars(fct_reorder(unit_name, -n)), ncol = 3) +
scale_colour_viridis_d(option = "plasma") +
labs(
title = "National Park Visits, 1904–2016",
subtitle = "Percentage of all-time average number of visits, by year",
caption = "Source: dataisplural/data.world | Graphic: Georgios Karamanis"
) +
guides(color = guide_colorbar(
title.position = "top",
label.position = "top",
title = NULL,
barwidth = 20,
barheight = 0.5
)) +
theme_void(base_family = "IBM Plex Sans") +
theme(
legend.position = "none",
legend.title = element_text(size = 20, color = "grey20"),
legend.margin = margin(0, 0, 20, 0),
plot.background = element_rect(fill = "grey80", color = NA),
strip.background = element_rect(fill = "grey80", color = NA),
strip.text = element_text(family = "IBM Plex Sans Bold", color = "grey30",
hjust = 1, vjust = 1),
plot.title = element_text(size = 28, color = "grey20", family = "IBM Plex Sans Medium"),
plot.subtitle = element_text(size = 20, color = "grey20"),
plot.caption = element_text(size = 8, color = "grey30", margin = margin(20, 0, 0, 0)),
axis.text.x = element_text(family = "IBM Plex Mono", size = 7, color = "grey40"),
panel.grid.major.x = element_line(color = "grey75"),
plot.margin = margin(20, 20, 20, 20)
) +
# save image
ggsave(
here::here("images", paste0("national-parks", format(Sys.time(), "%Y%m%d_%H%M%S"), ".png")),
width = 18, height = 14, dpi = 320
)
Source: https://github.com/jkaupp/tidytuesdays/blob/master/2019/week38/R/analysis.R
# Get park fees
fees_page <- "https://www.nps.gov/aboutus/entrance-fee-prices.htm"
parks <- read_html(fees_page) %>%
html_nodes("h3") %>%
html_text() %>%
.[-1:-2]
park_fees <- read_html(fees_page) %>%
html_nodes(".table-wrapper > table") %>%
html_table() %>%
map(~set_names(.x, c("date", "park_specific_annual_pass", "per_vehicle", "per_person", "per_motorcycle"))) %>%
map2(parks, ~mutate(.x, park = .y)) %>%
bind_rows() %>%
filter(date == "Current") %>%
rename(park_name = park) %>%
mutate(park_name = stringi::stri_trans_general(park_name, id = "Latin-ASCII"),
park_name = str_replace(park_name, "Hawai'i", "Hawaii"))
summary_report <- read_csv(here("data/annual_summary_report.csv")) %>% clean_names()
park_revenue_data <- summary_report %>%
filter(year == 2018) %>%
mutate(visitors = recreation_visitors + non_recreation_visitors) %>%
select(year, park_name, visitors) %>%
mutate(park_name = str_remove(park_name, "[A-Z]{2,}"),
park_name = str_remove(park_name, "& PRES"),
park_name = trimws(park_name)) %>%
regex_left_join(park_fees, ., ignore_case = TRUE) %>%
distinct(year, park_name.x, .keep_all = TRUE) %>%
filter(str_detect(park_name.x, "Park"), !str_detect(park_name.x, "Great Falls")) %>%
mutate(revenue = visitors * parse_number(per_person)) %>%
rename(park_name = park_name.x) %>%
select(-park_name.y)
## Joining by: "park_name"
plot <- ggplot(park_revenue_data, aes(x = fct_reorder(park_name, revenue), y = revenue)) +
geom_col(fill = "#5e81ac", size = 0.1) +
coord_flip() +
scale_y_continuous(labels = scales::dollar, expand = c(0.01,0)) +
labs(title = "Estimated National Park Revenue from Fees for 2018",
subtitle = str_wrap("Illustrated below is a bar chart of fee revenue from US National Parks in 2018. Estimated Revenue calculated using per person admittance rates and total park visitors.", 95),
caption = "Data: www.nps.gov | Graphic: @jakekaupp",
x = NULL,
y = NULL) +
theme_jk(grid = "X") +
theme(plot.background = element_rect(fill = "#2e3440"),
text = element_text(color = "#eceff4"),
panel.grid = element_line(color = "#e5e9f0"),
axis.text.x = element_text(color = "#eceff4"),
axis.text.y = element_text(color = "#eceff4"))
ggsave(here("images", "tw_38plot.png"), plot, width = 10, height = 8, device = agg_png())
Using the park name, or any location name, and the Geocoding API from Google, we got the longitude and latitude for each parks locations.
# library(ggmap)
# unique_locations <- summary_report %>% distinct(park_name)
# locations_df <- mutate_geocode(unique_locations, park_name)
# write_csv(locations_df, 'data/park_locations_from_google_api.csv')
trial <- summary_report %>%
mutate(visitors = recreation_visitors + non_recreation_visitors) %>%
select(year, park_name, visitors)
park_locations <- read_csv('data/park_locations_from_google_api.csv')
colnames(park_locations)[colnames(park_locations)=="lon"] <- "long"
plot_data <- trial %>%
regex_left_join(park_locations, ., ignore_case = TRUE)
plot_data <- plot_data %>% filter(long > -128, lat < 51, lat > 24)
plot_data <- plot_data[complete.cases(plot_data), ] %>% group_by(year, park_name.x)
split_by_year <- plot_data %>% split(plot_data$year)
park_names_over_the_years <- split_by_year$`1979`$park_name.x
for (i in split_by_year) {
park_names_over_the_years <- intersect(park_names_over_the_years, i$park_name.x)
}
plot_data <- plot_data %>% filter(park_name.x %in% park_names_over_the_years)
plot_data$year <- as.integer(plot_data$year)
plot_data$visitors <- scale(plot_data$visitors)
plot_data <- subset(plot_data, select = -c(park_name.y))
colnames(plot_data)[colnames(plot_data)=="park_name.x"] <- "park_name"
library(maps)
us_states <- map_data("state")
ditch_the_axes <- theme(
axis.text = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.title = element_blank()
)
g <- ggplot(data = us_states) +
geom_polygon(aes(x = long, y = lat, group = group), fill=NA, color= 'grey') +
coord_fixed(1.3) +
geom_point(data = plot_data, aes(x = long, y = lat, color = visitors, size = visitors)) +
guides(size=FALSE) +
scale_colour_gradient(low="#ffffff", high="#0072B2") +
ditch_the_axes +
theme(
# Change plot and panel background
panel.background = element_rect(fill = 'black')
)
g + transition_time(year) +
labs(title = "Year: {frame_time}, Park visits, scaled")
g
anim_save("images/park_visits_by_year.gif")